package com.jlh.hanlp;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.model.crf.CRFLexicalAnalyzer;
import com.hankcs.hanlp.model.crf.CRFNERecognizer;
import com.hankcs.hanlp.model.crf.CRFPOSTagger;
import com.hankcs.hanlp.model.crf.CRFSegmenter;
import com.hankcs.hanlp.seg.CRF.CRFSegment;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;

/**
 * @author mymx.jlh
 * @version 1.0.0 2018/6/7 15:57
 */
public class MainTest {

    public static void main(String[] args) throws IOException {
//        //第一个demo
//        System.out.println(HanLP.segment("你好，欢迎使用HanLP汉语处理包！"));
//        //标准分词
//        List<Term> termList = StandardTokenizer.segment("商品和服务");
//        System.out.println(termList);

        crf();
//        nameRec();
    }


    private static void crf() throws IOException {

//        try {
//            CRFSegmenter segmenter = new CRFSegmenter(null);
//            //训练模型
//            segmenter.train("C:/D-drive-78765/work/data/model/pku/199801.txt", HanLP.Config.PerceptronCWSModelPath);
//        } catch (IOException e) {
//            e.printStackTrace();
//        }


//        CRFNERecognizer crfneRecognizer = new CRFNERecognizer(null);
//        crfneRecognizer.train("C:/D-drive-78765/work/data/model/pku/199801.txt",HanLP.Config.PerceptronNERModelPath);



//        CRFLexicalAnalyzer analyzer = new CRFLexicalAnalyzer(HanLP.Config.CRFCWSModelPath, HanLP.Config.CRFPOSModelPath,HanLP.Config.CRFNERModelPath);

//        String[] tests = new String[]{
//                "商品和服务",
//                "上海华安工业（集团）公司董事长谭旭光和秘书胡花蕊来到美国纽约现代艺术博物馆参观",
//                "微软公司於1975年由比爾·蓋茲和保羅·艾倫創立，18年啟動以智慧雲端、前端為導向的大改組。" // 支持繁体中文
//        };
//
//        for (String sentence : tests)
//        {
//            System.out.println(analyzer.analyze(sentence));
//            System.out.println(analyzer.seg(sentence));
//        }



//        CRFSegmenter crfSegmenter = new CRFSegmenter(null);
//        crfSegmenter.train("C:/D-drive-78765/work/data/model/pku/medical.txt","C:/D-drive-78765/work/data/model/perceptron/pku199801/cws-test.bin");
//
//        CRFPOSTagger crfposTagger = new CRFPOSTagger(null);
//        crfposTagger.train("C:/D-drive-78765/work/data/model/pku/medical.txt","C:/D-drive-78765/work/data/model/perceptron/pku199801/pos-test.bin");
//
//        CRFNERecognizer crfneRecognizer = new CRFNERecognizer(null);
//        crfneRecognizer.train("C:/D-drive-78765/work/data/model/pku/medical.txt","C:/D-drive-78765/work/data/model/perceptron/pku199801/ner-test.bin");

        CRFLexicalAnalyzer analyzer = new CRFLexicalAnalyzer("C:/D-drive-78765/work/data/model/perceptron/pku199801/cws-test.bin","C:/D-drive-78765/work/data/model/perceptron/pku199801/pos-test.bin","C:/D-drive-78765/work/data/model/perceptron/pku199801/ner-test.bin");

        System.out.println(analyzer.seg("(左侧甲状腺)微小乳头状癌,直径0.1cm.(右侧甲状腺)低分化癌,结合兔疫表型考虑甲状腺样癌,肿瘤直径0.8cm,(左中央区淋巴结)0/7阳性。(右中央区淋巴结)0/11阳性。(右喉返神经后)淋巴结0/4阳性。兔疫组化结果:Calcitonin散在+,CK19+,Syn+,C9A+,CD56+,TTF-1+, Thyroglobulin-,CEA+,S-100散在+,K-671%+。分子检测报告:该患者BRAF基因(V600E）和IER基因(C228T,C250)为无突变"));
        System.out.println(analyzer.seg("(左侧甲状腺)桥本性甲状腺炎伴结节性甲状腺肿。(右侧甲状腺)微小乳头状癌伴钙化,大小约0.5*0.3cm。(中央区巴结)0/15阳性。 Kras exon2/3/4无突变 Nras exon2/3/4无突变 TERT C228无突变 TERT C250T无突变 PIK3CA EI545K无变PIK3CAH1047R无突变 BRAF V600E无突变"));
    }

    private static void nameRec(){
        String[] testCase = new String[]{
                "签约仪式前，秦光荣、李纪恒、仇和等一同会见了参加签约的企业家。",
                "王国强、高峰、汪洋、张朝阳光着头、韩寒、小四",
                "张浩和胡健康复员回家了",
                "王总和小丽结婚了",
                "编剧邵钧林和稽道青说",
                "这里有关天培的有关事迹",
                "龚学平等领导,邓颖超生前",
        };
        Segment segment = HanLP.newSegment().enableNameRecognize(true);
        for (String sentence : testCase)
        {
            List<Term> termList = segment.seg(sentence);
            System.out.println(termList);
        }
    }
}
